/*
	Purpose: Using the sample of 1940 Census fathers aged 30-50
	         (from 0b), this file creates father income scores
	         at the occupation x race level. 
	         
	Creates: avgincomes_fathers1940_byrace.dta
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

use ./input/Census1940_fathers_ages30to50_forIncomeScores.dta, clear 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
	
	gen number=1
	
*******************
*** TEMPLATE
*******************

* occupation x race
preserve

	collapse (min) race, by(occ1950ej)
	
	expand 2
	bysort occ1950ej: replace race=2 if _n==2

	tempfile occbyrace
	save `occbyrace'

restore 

*******************
*** COLLAPSE 
*******************

	collapse (rawsum) number  (mean)  incwage fam_income hh_income , by(occ1950ej race) 

	tempfile income
	save `income'

* Merge into template
	use `occbyrace'
	merge 1:1 occ1950ej race using `income'
	drop _merge
	replace number=0 if number==.

* Count missings
	assert incwage!=.

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

**********
* Save
**********

	label var incwage "Coarse income score, incwage, occ by race"
	label var hh_income "Coarse income score, household income, occ by race"
	label var fam_income "Coarse income score, family income, occ by race"
	label var race "Respondent race"
	label var number "Number of obs in 1940 occ cell"

	rename incwage avg_incwage_1940_byrace
	rename fam_income avg_faminc_1940_byrace
	rename hh_income avg_HHinc_1940_byrace
	rename occ1950ej fatheroccej
	rename number number_1940obs

	save ./output/avgincomes_fathers1940_byrace.dta, replace
